const char *cl_source_md =
"#ifdef SINGLE_PRECISION\n"
"#define POSVECTYPE float4\n"
"#define FORCEVECTYPE float4\n"
"#define FPTYPE float\n"
"#elif K_DOUBLE_PRECISION\n"
"#pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
"#define POSVECTYPE double4\n"
"#define FORCEVECTYPE double4\n"
"#define FPTYPE double\n"
"#elif AMD_DOUBLE_PRECISION\n"
"#pragma OPENCL EXTENSION cl_amd_fp64: enable\n"
"#define POSVECTYPE double4\n"
"#define FORCEVECTYPE double4\n"
"#define FPTYPE double\n"
"#endif\n"
"\n"
"__kernel void compute_lj_force(__global FORCEVECTYPE *force,\n"
"                               __global POSVECTYPE *position,\n"
"                               const int neighCount,\n"
"                               __global int* neighList,\n"
"                               const FPTYPE cutsq,\n"
"                               const FPTYPE lj1,\n"
"                               const FPTYPE lj2,\n"
"                               const int inum)\n"
"{\n"
"    uint idx = get_global_id(0);\n"
"\n"
"    POSVECTYPE ipos = position[idx];\n"
"    FORCEVECTYPE f = {0.0f, 0.0f, 0.0f, 0.0f};\n"
"\n"
"    int j = 0;\n"
"    while (j < neighCount)\n"
"    {\n"
"        int jidx = neighList[j*inum + idx];\n"
"\n"
"        // Uncoalesced read\n"
"        POSVECTYPE jpos = position[jidx];\n"
"\n"
"        // Calculate distance\n"
"        FPTYPE delx = ipos.x - jpos.x;\n"
"        FPTYPE dely = ipos.y - jpos.y;\n"
"        FPTYPE delz = ipos.z - jpos.z;\n"
"        FPTYPE r2inv = delx*delx + dely*dely + delz*delz;\n"
"\n"
"        // If distance is less than cutoff, calculate force\n"
"        if (r2inv < cutsq)\n"
"        {\n"
"            r2inv = 1.0f/r2inv;\n"
"            FPTYPE r6inv = r2inv * r2inv * r2inv;\n"
"            FPTYPE forceC = r2inv*r6inv*(lj1*r6inv - lj2);\n"
"\n"
"            f.x += delx * forceC;\n"
"            f.y += dely * forceC;\n"
"            f.z += delz * forceC;\n"
"        }\n"
"        j++;\n"
"    }\n"
"    // store the results\n"
"    force[idx] = f;\n"
"}\n"
;
